{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 43,
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from xgboost import XGBRFRegressor as XGBR\n",
    "from numpy import *\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.linear_model import LinearRegression as LinearRegression\n",
    "from sklearn.model_selection import KFold,cross_val_score as CVS,train_test_split as TTS\n",
    "from sklearn.metrics import mean_squared_error as MSE\n",
    "wine = pd.read_excel(\"RF_lgb_Relation.xlsx\",header=0)\n",
    "wine0 = pd.read_csv(\"after_process_database.csv\",header=0)\n",
    "feature20 = pd.read_csv(\"final_features.csv\",header=0)\n",
    "feature_20=feature20.loc[:,'name'].values\n",
    "after_feature=wine.loc[:49,'Rela删除后的RF'].values\n",
    "index50 = wine0.loc[:,after_feature].values\n",
    "index20 = wine0.loc[:,feature_20].values\n",
    "yList = wine0.pIC50.values\n",
    "index20.shape"
   ],
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "(1974, 20)"
      ]
     },
     "metadata": {},
     "execution_count": 43
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "source": [
    "# X_train,X_test,y_train,y_test = TTS(index50,yList,test_size=0.3)#50维度数据\n",
    "# X_train.shape"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "source": [
    "# reg= XGBR(n_estimator = 100).fit(X_train,y_train)#100ge 树"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "source": [
    "# reg.predict(X_test)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "source": [
    "# MSE(y_test,reg.predict(X_test))"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "source": [
    "# reg.feature_importances_"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "source": [
    "#交叉验证，导入没有训练的模型\n",
    "reg1= XGBR(n_estimator = 150)\n",
    "# CVS(reg1,X_train,y_train,cv=5)#只在训练集上，更严谨\n"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "source": [
    "MSE=CVS(reg1,index20,yList,cv=5,scoring='neg_mean_squared_error')#只在训练集上，更严谨\n"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[22:59:44] WARNING: ../src/learner.cc:573: \n",
      "Parameters: { \"n_estimator\" } might not be used.\n",
      "\n",
      "  This may not be accurate due to some parameters are only used in language bindings but\n",
      "  passed down to XGBoost core.  Or some parameters are not used but slip through this\n",
      "  verification. Please open an issue if you find above cases.\n",
      "\n",
      "\n",
      "[23:01:48] WARNING: ../src/learner.cc:573: \n",
      "Parameters: { \"n_estimator\" } might not be used.\n",
      "\n",
      "  This may not be accurate due to some parameters are only used in language bindings but\n",
      "  passed down to XGBoost core.  Or some parameters are not used but slip through this\n",
      "  verification. Please open an issue if you find above cases.\n",
      "\n",
      "\n",
      "[23:07:27] WARNING: ../src/learner.cc:573: \n",
      "Parameters: { \"n_estimator\" } might not be used.\n",
      "\n",
      "  This may not be accurate due to some parameters are only used in language bindings but\n",
      "  passed down to XGBoost core.  Or some parameters are not used but slip through this\n",
      "  verification. Please open an issue if you find above cases.\n",
      "\n",
      "\n",
      "[23:13:26] WARNING: ../src/learner.cc:573: \n",
      "Parameters: { \"n_estimator\" } might not be used.\n",
      "\n",
      "  This may not be accurate due to some parameters are only used in language bindings but\n",
      "  passed down to XGBoost core.  Or some parameters are not used but slip through this\n",
      "  verification. Please open an issue if you find above cases.\n",
      "\n",
      "\n",
      "[23:19:38] WARNING: ../src/learner.cc:573: \n",
      "Parameters: { \"n_estimator\" } might not be used.\n",
      "\n",
      "  This may not be accurate due to some parameters are only used in language bindings but\n",
      "  passed down to XGBoost core.  Or some parameters are not used but slip through this\n",
      "  verification. Please open an issue if you find above cases.\n",
      "\n",
      "\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "source": [
    "MSE.mean()"
   ],
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "-1.1010830001853962"
      ]
     },
     "metadata": {},
     "execution_count": 51
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "source": [
    "import pickle\n",
    "pickle.dump(reg1, open(\"reg1_20.dat\", \"wb\"))"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "source": [
    "reg1_model = pickle.load(open(\"reg1_20.dat\", \"rb\"))\n",
    "reg1.predict(index20)"
   ],
   "outputs": [
    {
     "output_type": "error",
     "ename": "NotFittedError",
     "evalue": "need to call fit or load_model beforehand",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNotFittedError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-53-20f705d84b96>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mreg1_model\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"reg1_20.dat\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mreg1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/xgboost/sklearn.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X, output_margin, ntree_limit, validate_features, base_margin, iteration_range)\u001b[0m\n\u001b[1;32m    813\u001b[0m         \"\"\"\n\u001b[1;32m    814\u001b[0m         iteration_range = _convert_ntree_limit(\n\u001b[0;32m--> 815\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_booster\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mntree_limit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miteration_range\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    816\u001b[0m         )\n\u001b[1;32m    817\u001b[0m         \u001b[0miteration_range\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_iteration_range\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miteration_range\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/xgboost/sklearn.py\u001b[0m in \u001b[0;36mget_booster\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    395\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_Booster'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    396\u001b[0m             \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 397\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'need to call fit or load_model beforehand'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    398\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_Booster\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNotFittedError\u001b[0m: need to call fit or load_model beforehand"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "MSE(yList,reg1_model.predict(index20))"
   ],
   "outputs": [
    {
     "output_type": "error",
     "ename": "NotFittedError",
     "evalue": "need to call fit or load_model beforehand",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNotFittedError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-39-eaf03dd3dfe5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mMSE\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0myList\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreg1_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/xgboost/sklearn.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X, output_margin, ntree_limit, validate_features, base_margin, iteration_range)\u001b[0m\n\u001b[1;32m    813\u001b[0m         \"\"\"\n\u001b[1;32m    814\u001b[0m         iteration_range = _convert_ntree_limit(\n\u001b[0;32m--> 815\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_booster\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mntree_limit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miteration_range\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    816\u001b[0m         )\n\u001b[1;32m    817\u001b[0m         \u001b[0miteration_range\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_iteration_range\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miteration_range\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/xgboost/sklearn.py\u001b[0m in \u001b[0;36mget_booster\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    395\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_Booster'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    396\u001b[0m             \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexceptions\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 397\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mNotFittedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'need to call fit or load_model beforehand'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    398\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_Booster\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNotFittedError\u001b[0m: need to call fit or load_model beforehand"
     ]
    }
   ],
   "metadata": {}
  }
 ],
 "metadata": {
  "orig_nbformat": 4,
  "language_info": {
   "name": "python",
   "version": "3.7.10",
   "mimetype": "text/x-python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "pygments_lexer": "ipython3",
   "nbconvert_exporter": "python",
   "file_extension": ".py"
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3.7.10 64-bit ('deeplearning': conda)"
  },
  "interpreter": {
   "hash": "a2441be928683876ab9582b799059e1f9f764e7f9dd2044f4d30ad5ffc1df7fc"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}